import pandas as pd
from utils.excel_converter.py import excel_to_csv

def load_raw_data(file_path):
    """加载原始数据"""
    return pd.read_csv(file_path)

def clean_data(df):
    """数据清洗"""
    # 清除缺失值、重复数据等
    df = df.dropna()
    df = df.drop_duplicates()
    return df

def save_processed_data(df, file_path):
    """保存处理后的数据"""
    df.to_csv(file_path, index=False)

if __name__ == "__main__":
    # 如果数据是从Excel文件转换来的
    excel_file_path = "data/raw/ssq/lottery_data.xlsx"
    sheet_name = "Sheet1"
    raw_csv_path = "data/raw/ssq/lottery_data.csv"
    
    # 转换Excel到CSV
    excel_to_csv(excel_file_path, sheet_name, raw_csv_path)
    
    # 加载和处理数据
    processed_data_path = "data/processed/ssq/cleaned_data.csv"
    
    df = load_raw_data(raw_csv_path)
    df = clean_data(df)
    save_processed_data(df, processed_data_path)